libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';

*This program creaes TAQ_trading which is referenced in many subsequent codes;



*start with TAQ data from intraday indicator file;



data taq_oib;
set mylib.taq18_21;
by sym_root date;



if BUYNUMTRADES_RETAIL = . then BUYNUMTRADES_RETAIL =0;
if SELLNUMTRADES_RETAIL = . then SELLNUMTRADES_RETAIL =0;

if BUYVOL_RETAIL = . then BUYVOL_RETAIL =0;
if sellvol_retail = . then sellvol_retail =0;



retail_vol = log(1+BUYVOL_RETAIL + sellvol_retail);
inst_vol = log(1+BUYVOL_LR + sellVOL_LR);

percent_retail = (BUYVOL_RETAIL + sellvol_retail)/(BUYVOL_RETAIL + sellvol_retail + BUYVOL_INST20K + sellvol_inst20k);

retail_oib = (BUYVOL_RETAIL - sellvol_retail)/ (BUYVOL_RETAIL + sellvol_retail);
inst_oib = (BUYVOL_LR - sellVOL_LR)/ (BUYVOL_LR + sellVOL_LR);



retail_vol_trades = log (1+BUYNUMTRADES_RETAIL + SELLNUMTRADES_RETAIL);
retail_oib_trades =  (BUYNUMTRADES_RETAIL - SELLNUMTRADES_RETAIL)/(BUYNUMTRADES_RETAIL + SELLNUMTRADES_RETAIL);

INST_vol_trades = log (1+BUYNUMTRADES_INST20K + SELLNUMTRADES_INST20K);
INST_oib_trades =  (BUYNUMTRADES_INST20K - SELLNUMTRADES_INST20K)/(BUYNUMTRADES_INST20K + SELLNUMTRADES_INST20K);


retail_trades = BUYNUMTRADES_RETAIL - SELLNUMTRADES_RETAIL;
retail_vol = BUYVOL_RETAIL - sellvol_retail;
inst_vol =  BUYVOL_LR - sellVOL_LR;



keep date sym_root retail_trades retail_vol inst_vol retail_oib  inst_oib retail_oib_trades;
run;



data test;
set taq_oib;
*where sym_root = 'GME';
A = retail_trades;
B = retail_VOL;
C = INST_VOL;


run;



%macro calcret;
data test2;
set test;
by sym_root;


 array lagA[252] lagA1-lagA252;
 array lagB[252] lagB1-lagB252;
array lagC[252] lagC1-lagC252;



/* This is the macro portion: a "do-loop" to create the 59 necessary lags */

    %do j=1 %to 252;
       lagA&j=lag&j(A);
	   lagB&j=lag&j(B);
	lagC&j=lag&j(C);

    %end;

/* The following statements set to missing lagged values that */
/* are reading the data corresponding to the previous PERMNO  */

    if first.sym_root then count=1;
    do i=count to 252;
      lagA[i] = .;
  lagB[i] = .;
    lagC[i] = .;


    end;
    count +1 ;

/* Calculate the cumulative returns by adding the logs of the returns*/

  


    A21_140=  mean(of lagA21-lagA140) ;
    A121_240=  mean(of lagA121-lagA240) ;
	std_A21_140=  std(of lagA21-lagA140) ;
    std_A121_240=  std(of lagA121-lagA240) ;

	 B21_140=  mean(of lagB21-lagB140) ;
    B121_240=  mean(of lagB121-lagB240) ;
	std_B21_140=  std(of lagB21-lagB140) ;
    std_B121_240=  std(of lagB121-lagB240) ;


	 C21_140=  mean(of lagC21-lagC140) ;
    C121_240=  mean(of lagC121-lagC240) ;
	std_C21_140=  std(of lagC21-lagC140) ;
    std_C121_240=  std(of lagC121-lagC240) ;
/* Drop unnecessary variables */



keep sym_root date A A21_140 A121_240 std_A21_140 std_A121_240
B B21_140 B121_240 std_B21_140 std_B121_240
C C21_140 C121_240 std_C21_140 std_C121_240;

%mend calcret;  /* End of macro  */
%calcret;       /* Run the macro */
proc sort data=test2;
by sym_root  date;
run;





data frenzies;
set test2;
std_retail_trades21 = (A -  A21_140)/std_A21_140;
std_retail_trades121 = (A -  A121_240)/std_A121_240;

std_retail_VOL21 = (B -  B21_140)/std_B21_140;
std_retail_VOL121 = (B -  B121_240)/std_B121_240;

std_INST_VOL21 = (C -  C21_140)/std_C21_140;
std_INST_VOL121 = (C -  C121_240)/std_c121_240;



KEEP SYM_ROOT DATE std_retail_trades21 std_retail_trades121 std_retail_VOL21 std_retail_VOL121 std_INST_VOL21 std_INST_VOL121;
 

run;

proc sort data=frenzies;
by sym_root date;
run;
proc sort data=taq_oib;
by sym_root date;
run;


data trading;
merge taq_oib frenzies;
by sym_root date;
if std_retail_trades21 = . then delete;
ticker = sym_root;
run;

*'s bring in information on dd posts from daily panel;
DATA INFO;
SET MYLIB.daily_panel_rfs;
IF 6<=CUM_MONTH<=42;
sym_root = ticker;
keep 
sym_root ticker date cum_month gme_amc_flag post_gme
ret5 ret21
net_dd2  NET_DD2_POST NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST
DD_and_SA DD_and_SA_POST DD_and_Non_Research DD_and_Non_Research_post net_dd2_pre
 ln_size ln_bm  abn_ret mom5 mom6_26     
BM_MISSING SIZE_MISSING   news_sentiment lag_sent5 lag_sent6_26 SENTIMENT21 sym_root;
run;



data news;
set mylib.media_coverage_rank;
date = date2;
keep ticker date news_rank;
run;


*combine all datasets;


proc sort data=trading;
by date ticker;
run;
proc sort data=info;
by date ticker;
run;
proc sort data=news;
by date ticker;
run;


data trading2;
merge trading info   news ;
by date ticker;
if ret5 = . then delete;
if 7<=cum_month<=42;
run;


proc sort  nodupkey data=trading2;
by   ticker date;
run;



data trading3;
set trading2;
by ticker date;

*define dependent variables;

*1) retail oib trades;
*2) retail_oib;
*3) inst_oib;
*4) std_retail_trades121 ;
*5) std_retail_VOL121  ;
*6) std_inst_vol121;

*create lags of dependent variables;

l1_retail_oib= lag(retail_oib);
l1_retail_oib_trades= lag(retail_oib_trades);
l1_inst_oib= lag(inst_oib);

l1_std_retail_trades121= lag(std_retail_trades121);
l1_std_retail_VOL121= lag(std_retail_VOL121);
l1_std_inst_vol121= lag(std_inst_vol121);




abs_mom5 = abs(mom5);
abs_mom6_26 = abs(mom6_26);



mom1 = lag(abn_ret);
abs_mom1 = abs(mom1);





if news_rank >=90 then heavy_news =1;
else heavy_news =0;
if news_rank = . then news_missing =1; else news_missing =0;
news_rank2 = news_rank;
if news_rank = . then news_rank2 =0;
abs_lag_sent5 = abs(lag_sent5);
abs_lag_sent6_26 = abs(lag_sent6_26);
lag_sent1 =  lag(news_sentiment);
abs_lag_sent1 = abs(lag_sent1);

NET_DD2_PRE = NET_DD2 * (1-POST_GME);
NET_SA2_PRE = NET_SA2 * (1- POST_GME);
NON_RESEARCH2_PRE = NON_RESEARCH2 * (1- POST_GME);
 


*add in post_gme interactions;


*keep:

group 1 identifying variables
group2: dependent variables:
group3) lag dependent
group4)  social media meausures
group5) other controls



1) permno ticker date cum_month post_gme gme_amc_flag
2) retail_oib_trades retail_oib inst_oib  std_retail_trades121  std_retail_VOL121 std_INST_VOL121
3) l1_retail_oib_trades l1_retail_oib l1_inst_oib  l1_std_retail_trades121  l1_std_retail_VOL121 l1_std_INST_VOL121
4) net_dd2  NET_DD2_POST NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST
DD_and_SA DD_and_SA_POST DD_and_Non_Research DD_and_Non_Research_post net_dd2_pre

5)  ln_size ln_bm  mom1 mom5 mom6_26 lag_sent1 lag_sent5 lag_sent6_26
 abs_mom1 abs_mom5  abs_mom6_26 abs_lag_sent1 abs_lag_sent5 abs_lag_sent6_26 
heavy_news news_rank2 news_missing size_missing bm_missing ; ;

keep  ticker date cum_month post_gme gme_amc_flag 
retail_oib_trades retail_oib inst_oib  std_retail_trades121  std_retail_VOL121 std_INST_VOL121
l1_retail_oib_trades l1_retail_oib l1_inst_oib  l1_std_retail_trades121  l1_std_retail_VOL121 l1_std_INST_VOL121
net_dd2  NET_DD2_POST NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST
DD_and_SA DD_and_SA_POST DD_and_Non_Research DD_and_Non_Research_post net_dd2_pre NET_SA2_PRE NON_RESEARCH2_PRE

ln_size ln_bm  mom1 mom5 mom6_26 lag_sent1 lag_sent5 lag_sent6_26
 abs_mom1 abs_mom5  abs_mom6_26 abs_lag_sent1 abs_lag_sent5 abs_lag_sent6_26 
heavy_news news_rank2 news_missing size_missing bm_missing ;
run;


proc sort data=trading3;
by date;
run;


proc rank data=trading3 out=trading3 groups =100;
by date;
var retail_oib_trades retail_oib inst_oib  std_retail_trades121  std_retail_VOL121 std_INST_VOL121
l1_retail_oib_trades l1_retail_oib l1_inst_oib  l1_std_retail_trades121  l1_std_retail_VOL121 l1_std_INST_VOL121;
run;


data mylib.taq_trading;
set trading3;
if retail_oib_trades = . then delete;
if inst_oib =  . then delete;
if retail_oib = . then delete;
if l1_std_retail_trades121 = . then delete;
if l1_std_retail_vol121 = . then delete;
if l1_std_inst_vol121 = . then delete;
run;

